{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pandas-数据分组聚合的补充知识"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "内容介绍:灵活的分组方式"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>科目</th>\n",
       "      <th>语文</th>\n",
       "      <th>英语</th>\n",
       "      <th>数学</th>\n",
       "      <th>班级</th>\n",
       "      <th>性别</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>姓名</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>李小刚</th>\n",
       "      <td>68</td>\n",
       "      <td>55</td>\n",
       "      <td>92</td>\n",
       "      <td>二班</td>\n",
       "      <td>男</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小花</th>\n",
       "      <td>74</td>\n",
       "      <td>58</td>\n",
       "      <td>53</td>\n",
       "      <td>一班</td>\n",
       "      <td>女</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张小敏</th>\n",
       "      <td>59</td>\n",
       "      <td>52</td>\n",
       "      <td>69</td>\n",
       "      <td>二班</td>\n",
       "      <td>男</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小强</th>\n",
       "      <td>76</td>\n",
       "      <td>84</td>\n",
       "      <td>51</td>\n",
       "      <td>一班</td>\n",
       "      <td>女</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小明</th>\n",
       "      <td>65</td>\n",
       "      <td>82</td>\n",
       "      <td>90</td>\n",
       "      <td>一班</td>\n",
       "      <td>男</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "科目   语文  英语  数学  班级 性别\n",
       "姓名                    \n",
       "李小刚  68  55  92  二班  男\n",
       "小花   74  58  53  一班  女\n",
       "张小敏  59  52  69  二班  男\n",
       "小强   76  84  51  一班  女\n",
       "小明   65  82  90  一班  男"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "idx = pd.Index(['李小刚','小花','张小敏','小强','小明'],name='姓名')\n",
    "cls = pd.Index(['语文','英语','数学'],name='科目')\n",
    "df00 = pd.DataFrame(np.random.randint(50,98,size=(5,3)),index=idx,columns=cls)\n",
    "df00['班级']=['二班','一班','二班','一班','一班']\n",
    "df00['性别']=['男','女','男','女','男']\n",
    "df00"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.通过列的字典分组:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "km = {'语文':'主科','数学':'主科','英语':'副科','物理':'副科'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>主科</th>\n",
       "      <th>副科</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>姓名</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>李小刚</th>\n",
       "      <td>160</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小花</th>\n",
       "      <td>127</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张小敏</th>\n",
       "      <td>128</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小强</th>\n",
       "      <td>127</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小明</th>\n",
       "      <td>155</td>\n",
       "      <td>82</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      主科  副科\n",
       "姓名          \n",
       "李小刚  160  55\n",
       "小花   127  58\n",
       "张小敏  128  52\n",
       "小强   127  84\n",
       "小明   155  82"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#按照列的字典（主科副科）的方式分组\n",
    "#字典对应的列名才能统计出数据。字典不对应，或者字典对应的列名比数据中的多，不影响计算。\n",
    "g5 = df00.groupby(km,axis=1).sum()\n",
    "g5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.类字典的Series数据进行分组:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "语文    主科\n",
       "数学    主科\n",
       "英语    副科\n",
       "物理    副科\n",
       "dtype: object"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#通过类似字典的Series数据进行分组\n",
    "s5 = pd.Series(km)\n",
    "s5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>主科</th>\n",
       "      <th>副科</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>姓名</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>李小刚</th>\n",
       "      <td>160</td>\n",
       "      <td>55</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小花</th>\n",
       "      <td>127</td>\n",
       "      <td>58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>张小敏</th>\n",
       "      <td>128</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小强</th>\n",
       "      <td>127</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>小明</th>\n",
       "      <td>155</td>\n",
       "      <td>82</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      主科  副科\n",
       "姓名          \n",
       "李小刚  160  55\n",
       "小花   127  58\n",
       "张小敏  128  52\n",
       "小强   127  84\n",
       "小明   155  82"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "g51 = df00.groupby(s5,axis=1).sum()\n",
    "g51"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.通过函数进行分组："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>科目</th>\n",
       "      <th>语文</th>\n",
       "      <th>英语</th>\n",
       "      <th>数学</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>215</td>\n",
       "      <td>224</td>\n",
       "      <td>194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>127</td>\n",
       "      <td>107</td>\n",
       "      <td>161</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "科目   语文   英语   数学\n",
       "2   215  224  194\n",
       "3   127  107  161"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#实际上传入的函数，针对行索引进行了长度计算，得出2个字和三个字两类进行分组\n",
    "df00.groupby(len).sum()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
